Cadences#

Hide imports
import os
from collections import defaultdict, Counter

from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from utils import STD_LAYOUT, CADENCE_COLORS, color_background, value_count_df, get_repo_name, print_heading, resolve_dir
Hide source
CORPUS_PATH = os.path.abspath(os.path.join('..', '..'))
print_heading("Notebook settings")
print(f"CORPUS_PATH: {CORPUS_PATH!r}")
CORPUS_PATH = resolve_dir(CORPUS_PATH)
Notebook settings
-----------------

CORPUS_PATH: '/home/runner/work/workflow_deployment/beethoven_piano_sonatas'
Hide source
repo = Repo(CORPUS_PATH)
print_heading("Data and software versions")
print(f"Data repo '{get_repo_name(repo)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Data and software versions
--------------------------

Data repo 'beethoven_piano_sonatas' @ 0dd51b2
dimcat version 0.3.0
ms3 version 2.2.1
dataset = dc.Dataset()
dataset.load(directory=CORPUS_PATH, parse_tsv=False)
[annotated|all|default]
All corpora
-----------
View: This view is called 'annotated'. It 
	- excludes pieces that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml),
	- excludes review files and folders, and
	- includes only facets containing 'expanded'.

                             has     active expanded       
                        metadata       view detected parsed
corpus                                                     
beethoven_piano_sonatas      yes  annotated       64     64

11/12 facets are excluded from this view.
N = 64 annotated pieces, 64 parsed dataframes.

Metadata#

all_metadata = dataset.data.metadata()
assert len(all_metadata) > 0, "No pieces selected for analysis."
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.count_pieces()} scores.")
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:,:20]
Concatenated 'metadata.tsv' files cover 64 of the 64 scores.
piece TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers
corpus
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308 304 1216.0 1476.0 1679 985 0 0 241 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN

All annotation labels from the selected pieces#

all_labels = dataset.data.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:20].style.apply(color_background, subset="chord")
21962 hand-annotated harmony labels:
      mc mn quarterbeats quarterbeats_all_endings duration_qb mc_onset mn_onset timesig staff voice label alt_label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus piece i                                                                  
beethoven_piano_sonatas 01-1 0 1 0 0 0 9.000000 0 3/4 2/2 2 1 f.i{ nan f i nan i nan i nan nan nan nan nan { m True True (0, -3, 1) () 0 0 nan
1 4 3 9 9 8.000000 0 0 2/2 2 1 V65 nan f i nan V65 nan V nan 65 nan nan nan nan Mm7 True True (5, 2, -1, 1) () 1 5 nan
2 6 5 17 17 4.000000 0 0 2/2 2 1 i nan f i nan i nan i nan nan nan nan nan nan m True True (0, -3, 1) () 0 0 nan
3 7 6 21 21 4.000000 0 0 2/2 2 1 #viio6 nan f i nan #viio6 nan #vii o 6 nan nan nan nan o True True (2, -1, 5) () 5 2 nan
4 8 7 25 25 2.000000 0 0 2/2 2 1 i6 nan f i nan i6 nan i nan 6 nan nan nan nan m True True (-3, 1, 0) () 0 -3 nan
5 8 7 27 27 2.000000 1/2 1/2 2/2 2 1 iio6 nan f i nan iio6 nan ii o 6 nan nan nan nan o True True (-1, -4, 2) () 2 -1 nan
6 9 8 29 29 1.000000 0 0 2/2 2 1 V(4)} nan f i nan V(4) nan V nan nan 4 nan nan } M True True (1, 0, 2) () 1 1 nan
7 9 8 30 30 2.000000 1/4 1/4 2/2 2 1 V|HC nan f i nan V nan V nan nan nan nan HC nan M True True (1, 5, 2) () 1 1 nan
8 9 8 32 32 9.000000 3/4 3/4 2/2 2 1 v{ nan f i nan v nan v nan nan nan nan nan { m True True (1, -2, 2) () 1 1 nan
9 12 11 41 41 4.000000 0 0 2/2 2 1 III.IVM2 ii7(2) f III nan IVM2 nan IV M 2 nan nan nan nan MM7 True False (4, -1, 3, 0) () -1 4 nan
10 13 12 45 45 4.000000 0 0 2/2 2 1 ii7 nan f III nan ii7 nan ii nan 7 nan nan nan nan mm7 True False (2, -1, 3, 0) () 2 2 nan
11 14 13 49 49 4.000000 0 0 2/2 2 1 V43 nan f III nan V43 nan V nan 43 nan nan nan nan Mm7 True False (2, -1, 1, 5) () 1 2 nan
12 15 14 53 53 4.000000 0 0 2/2 2 1 I nan f III nan I nan I nan nan nan nan nan nan M True False (0, 4, 1) () 0 0 nan
13 16 15 57 57 1.000000 0 0 2/2 2 1 ii6(2) nan f III nan ii6(2) nan ii nan 6 2 nan nan nan m True False (-1, 3, 4) () 2 -1 nan
14 16 15 58 58 1.000000 1/4 1/4 2/2 2 1 ii6 nan f III nan ii6 nan ii nan 6 nan nan nan nan m True False (-1, 3, 2) () 2 -1 nan
15 16 15 59 59 2.000000 1/2 1/2 2/2 2 1 V65/V nan f III nan V65/V nan V nan 65 nan V nan nan Mm7 True False (6, 3, 0, 2) () 2 6 nan
16 17 16 61 61 3.000000 0 0 2/2 2 1 V|HC} nan f III nan V nan V nan nan nan nan HC } M True False (1, 5, 2) () 1 1 nan
17 17 16 64 64 1.000000 3/4 3/4 2/2 2 1 I6{ nan f III nan I6 nan I nan 6 nan nan nan { M True False (4, 1, 0) () 0 4 nan
18 18 17 65 65 1.000000 0 0 2/2 2 1 ii6(2) nan f III nan ii6(2) nan ii nan 6 2 nan nan nan m True False (-1, 3, 4) () 2 -1 nan
19 18 17 66 66 1.000000 1/4 1/4 2/2 2 1 ii6 nan f III nan ii6 nan ii nan 6 nan nan nan nan m True False (-1, 3, 2) () 2 -1 nan

Filtering out pieces without cadence annotations#

hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
assert () in hascadence.indices and len(hascadence.indices[()]) > 0, "No cadences found."
print(f"Before: {len(dataset.indices[()])} pieces; after removing those without cadence labels: {len(hascadence.indices[()])}")
Before: 64 pieces; after removing those without cadence labels: 64

Show corpora containing pieces with cadence annotations#

grouped_by_corpus = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in  grouped_by_corpus.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_corpus.indices.values()))} pieces containing cadence annotations:")
corpora
1 corpora with 64 pieces containing cadence annotations:
{'beethoven_piano_sonatas': '64 pieces'}

All annotation labels from the selected pieces#

all_labels = hascadence.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 13:].style.apply(color_background, subset="chord")
21787 hand-annotated harmony labels:
      localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend
corpus fname interval                                        
beethoven_piano_sonatas 01-1 [0.0, 9.0) i nan i nan i nan nan nan nan nan { m True True (0, -3, 1) () 0 0 nan
[9.0, 17.0) i nan V65 nan V nan 65 nan nan nan nan Mm7 True True (5, 2, -1, 1) () 1 5 nan
[17.0, 21.0) i nan i nan i nan nan nan nan nan nan m True True (0, -3, 1) () 0 0 nan
[21.0, 25.0) i nan #viio6 nan #vii o 6 nan nan nan nan o True True (2, -1, 5) () 5 2 nan
[25.0, 27.0) i nan i6 nan i nan 6 nan nan nan nan m True True (-3, 1, 0) () 0 -3 nan
[27.0, 29.0) i nan iio6 nan ii o 6 nan nan nan nan o True True (-1, -4, 2) () 2 -1 nan
[29.0, 30.0) i nan V(4) nan V nan nan 4 nan nan } M True True (1, 0, 2) () 1 1 nan
[30.0, 32.0) i nan V nan V nan nan nan nan HC nan M True True (1, 5, 2) () 1 1 nan
[32.0, 41.0) i nan v nan v nan nan nan nan nan { m True True (1, -2, 2) () 1 1 nan
[41.0, 45.0) III nan IVM2 nan IV M 2 nan nan nan nan MM7 True False (4, -1, 3, 0) () -1 4 nan

Metadata#

dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb ... ambitus imslp musicbrainz viaf wikidata originalFormat staff_1_ambitus staff_1_instrument staff_2_ambitus staff_2_instrument
dataset piece
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308 304 1216.0 1476.00 ... 32-89 (Ab1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/a78520e0-0211-3b5... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 xml 51-89 (Eb3-F6) piano 32-73 (Ab1-Db5) piano
01-2 1: 3/4 1: -1 62 61 183.0 124 122 366.0 526.17 ... 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/bea1b893-2732-33a... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 xml 43-89 (G2-F6) piano 31-77 (G1-F5) piano
01-3 1: 3/4 1: -4, 43: -1 77 73 219.0 196 186 558.0 565.50 ... 31-85 (G1-Db6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/2bd7e1ea-c696-3be... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 mxl 53-85 (F3-Db6) Piano 31-74 (G1-D5) Piano
01-4 1: 2/2 1: -4 199 196 790.0 392 390 1560.0 [[[57, 58], [59, 60, 61]]] 2326.83 ... 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/b755e900-804a-312... https://viaf.org/viaf/179625665 https://www.wikidata.org/wiki/Q145813 mxl 50-89 (D3-F6) Piano 31-75 (G1-Eb5) Piano
02-1 1: 2/4 1: 3, 127: 0, 230: 3 342 336 679.5 672 664 1336.0 [[[115, 116, 117, 118], [119, 120, 121, 122, 1... 1695.75 ... 31-89 (G1-F6) https://imslp.org/wiki/Klaviersonaten_(Beethov... https://musicbrainz.org/work/c001a2eb-9493-327... https://viaf.org/viaf/179221580 https://www.wikidata.org/wiki/Q145699 xml 39-89 (D#2-F6) piano 31-76 (G1-E5) piano

5 rows × 55 columns

mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'), 
                      hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)

Overall#

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
1333 cadence labels.
counts %
cadence
PAC 603 0.452363
HC 406 0.304576
IAC 275 0.206302
EC 31 0.023256
DC 15 0.011253
PC 3 0.002251
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=CADENCE_COLORS)

Per dataset#

cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
      color_discrete_map=CADENCE_COLORS, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count', 
       facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=CADENCE_COLORS)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)

Per phrase#

Number of cadences per phrase#

segmented = dc.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
                               local_keys_per_phrase.rename('local_keys'),
                               phrases], axis=1)
phrases_with_cadences = pd.concat([
    phrase_gpb.cadence.nunique().rename('n_cadences'),
    phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
    phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
#phrases %
n_cadences
1 1258 0.937407
0 59 0.043964
2 25 0.018629
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
             labels=dict(n_cadences="#cadences in a phrase"),
             category_orders=dict(dataset=chronological_order)
      )
fig.show()

Combinations of cadence types for phrases with more than one cadence#

value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
counts %
cadences
(EC, PAC) 7 0.28
(HC, PAC) 5 0.20
(DC, PAC) 2 0.08
(PAC, HC) 2 0.08
(IAC, PAC) 2 0.08
(EC, IAC) 2 0.08
(DC, IAC) 1 0.04
(IAC, EC) 1 0.04
(HC, DC) 1 0.04
(EC, HC) 1 0.04
(DC, HC) 1 0.04

Positioning of cadences within phrases#

df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    #else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))
    
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
                labels=dict(marker='legend'), color_discrete_map=CADENCE_COLORS)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()

Cadence ultima#

phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
Ultima harmony missing for 1 cadence labels.

Ultimae as Roman numeral#

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: {color};"] * 4

cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree#

ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts', 
             facet_row='cadence', facet_col='localkey_is_minor', 
             height=1500, 
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()

Chord progressions#

PACs with ultima I/i#

def remove_immediate_duplicates(l):
    return tuple(a for a, b in zip(l, (None, ) + l) if a != b)

def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True, remove_duplicates=False):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.fillna(method='bfill').notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                if remove_duplicates:
                    progressions.append(remove_immediate_duplicates(cadence[feature].to_list()))
                else:
                    progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions, dtype='object')
    return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 602 cadences:
counts %
chord progressions
(I, V7, I) 9 0.014950
(I, V43, I, V6, I, V43, I, V7, I) 6 0.009967
(V7, I) 4 0.006645
(i, i(9#74), i, i6, i, V6, V, #viio43, i6, Ger6, V(64), V, i) 4 0.006645
(I, I64, V7, viio6, I64, V7, viio6, I) 4 0.006645
... ... ...
(i, iv64, #viio7, iv64, #viio65, iv64, #viio7, V2/iv, iv6, V65/iv, iv, #viio43, V65/V, viio7/V, V(64), V7, i) 1 0.001661
(I, V2, I, V2, I, V2, I) 1 0.001661
(I, V6, I, V65, I, V6, I, V6, I, i, viio, V7, Ger6, V7, V6/vi, vi, ii6, V7, I) 1 0.001661
(#viio/vi, vi, ii, V, V/ii, ii, V7, I) 1 0.001661
(V, V65/V, V6, V, V65/V, V6, V65/ii, V43, I, V6, IV6, V, IV6, V6, I, IV, I6, viio6, I6, viio6, vi6, V6, IV6, iii6, ii6, I6, viio6, I6, viio6, vi6, V6, IV6, iii6, ii6, I6, viio6, I6, viio6, vi6, V6, IV6, iii6, ii6, I6, viio6, I) 1 0.001661

499 rows × 2 columns

numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
counts %
numeral progressions
(I, V, I) 10 0.016611
(I, V, V, I) 9 0.014950
(I, V, I, V, I, V, I, V, I) 8 0.013289
(I, V, I, V, V, I, V, vi, V, V, vii, V, I, V, ii, V, I) 5 0.008306
(I, V, V, IV, I, ii, V, I) 4 0.006645
... ... ...
(I, V, I, V, I, V, I, V, I, i, vii, V, vii, V, V, vi, ii, V, I) 1 0.001661
(#vii, vi, ii, V, V, ii, V, I) 1 0.001661
(I, V, I, V, V, I, V, I, V, I, V, I, V, I, V, I, V, i, V, i, V, i, V, i, V, I, V, I, V, I, V, I, vi, ii, V, I, ii, V, V, V, I) 1 0.001661
(I, V, I, V, I, V, I, I, vi, IV, vii, V, vii, I) 1 0.001661
(V, V, V, V, V, V, V, V, I, V, IV, V, IV, V, I, IV, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I) 1 0.001661

464 rows × 2 columns

numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
counts %
(I, V, I) 28 0.046512
(I, V, I, V, I, V, I, V, I) 11 0.018272
(I, V, I, V, I) 10 0.016611
(V, I) 8 0.013289
(I, V, IV, I, ii, V, I) 6 0.009967
... ... ...
(V, i, iv, V, i) 1 0.001661
(V, i, V, i, V, i, V, I, V, I, vi, V, I) 1 0.001661
(i, iv, #vii, iv, #vii, iv, #vii, V, iv, V, iv, #vii, V, vii, V, i) 1 0.001661
(I, V, I, V, I, V, I, V, I, i, vii, V, vii, V, vi, ii, V, I) 1 0.001661
(V, I, V, IV, V, IV, V, I, IV, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I, vii, vi, V, IV, iii, ii, I, vii, I) 1 0.001661

410 rows × 2 columns

PACs ending on scale degree 1#

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 573 cadences:
counts %
bass progressions
(1, 5, 1) 11 0.019197
(1, 5, 5, 1) 7 0.012216
(1, 2, 1, 7, 1, 2, 1, 5, 1) 6 0.010471
(5, 1) 5 0.008726
(1, 4, 3, 7, 7, 1, 7, 6, 6, 5, 4, 4, 3, 6, 2, 5, 1) 5 0.008726
... ... ...
(1, b6, 5, 5, b6, 5, 5, 4, 5, b6, 2, b3, 4, 5, 5, 1) 1 0.001745
(7, 1, 1, 5, 4, b3, 1, 7, 1, 1, 1, 7, 1, 1, #4, 5, 5, 1, 6, 5, #4, 5, 5, 1) 1 0.001745
(1, 1, 7, 1, 2, 1, 7, b7, b6, 3, 4, 4, #4, #4, 5, 5, 1) 1 0.001745
(1, 4, 1, 4, 1, 4, 1) 1 0.001745
(5, #4, 7, 5, #4, 7, #1, 2, 1, 7, 6, 5, 6, 7, 1, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1) 1 0.001745

441 rows × 2 columns

bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
counts %
(1, 5, 1) 20 0.034904
(5, 1) 9 0.015707
(1, 5, 1, 5, 1) 7 0.012216
(1, 2, 3, 4, 5, 1) 6 0.010471
(1, 2, 1, 7, 1, 2, 1, 5, 1) 6 0.010471
... ... ...
(1, b6, 1, 4, 1, b6, 1, 4, 1, 7, 1, 7, 1) 1 0.001745
(1, 4, b3, 7, 1, 5, b6, 3, 4, 1, b2, 1, 7, #4, 5, 1, #4, 5, #4, 5, 1) 1 0.001745
(1, b3, 1, b6, 1, b6, b4, 5, b6, 6, b7, 7, 1, b2, 2, b3, 4, #4, 5, 1) 1 0.001745
(1, b6, 5, b6, 5, 4, b7, 1, 2, b3, 4, 5, 1) 1 0.001745
(5, #4, 7, 5, #4, 7, #1, 2, 1, 7, 6, 5, 6, 7, 1, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1) 1 0.001745

410 rows × 2 columns

def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
    if color=='auto':
        unique_labels = set(labels)
        color_step = 100 / len(unique_labels)
        unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
        color = list(map(lambda l: unique_colors[l], labels))
    fig = go.Figure(go.Sankey(
        arrangement = 'snap',
        node = dict(
          pad = pad,
          #thickness = 20,
          #line = dict(color = "black", width = 0.5),
          label = labels,
          x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          color = color,
          ),
        link = dict(
          source = data.source,
          target = data.target,
          value = data.value
          ),
        ),
     )

    fig.update_layout(margin=margin, **kwargs)
    return fig

def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights

def graph_data2sankey(stage_nodes, edge_weights):
    data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
    node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
    labels = [node2label[i] for i in range(len(node2label))]
    return make_sankey(data, labels)

def plot_progressions(progressions, cut_at_stage=None):
    stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
    return graph_data2sankey(stage_nodes, edge_weights)

Chordal roots for the 3 last stages#

plot_progressions(numeral_prog_no_dups, cut_at_stage=3)

Complete chords for the last four stages in major#

pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)

Bass degrees for the last 6 stages.#

plot_progressions(bass_prog_no_dups, cut_at_stage=7)

Bass degrees without accidentals#

def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))
                  
bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)

HCs ending on V#

half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 392 cadences: